home *** CD-ROM | disk | FTP | other *** search
Text File | 2000-10-06 | 16.2 KB | 690 lines | [TEXT/CWIE] |
- ///--------------------------------------------------------------------------------------
- // BlitPixieMask - a fast masked blitter
- //
- // Ideas and code snippets contributed by:
- // Ben Sharpe, Brigham Stevens, Sean Callahan, Joe Britt and Tim Collins
- //
- // Portions are copyright: © 1991-94 Tony Myles
- ///--------------------------------------------------------------------------------------
-
- #ifndef __BLITPIXIE__
- #include "BlitPixieHeader.h"
- #endif
-
- #include "BlitPixieAsm.h"
-
- #pragma mark *** PowerPC asm:
- #if USE_PPC_ASSEMBLY
-
- ///--------------------------------------------------------------------------------------
- // BlitPixieMask - PowerPC asm implementation by Anders F Björklund
- ///--------------------------------------------------------------------------------------
-
- ASM_FUNC void BlitPixieMask(
- register unsigned char *srcPixelP, // r3
- register unsigned char *dstPixelP, // r4
- register unsigned char *maskPixelP, // r5
- register unsigned long srcOffset, // r6
- register unsigned long dstOffset, // r7
- register unsigned short numBytesPerRow, // r8
- register unsigned short rowsToCopy) // r9
- {
- #define r_srcPixelP r3
- #define r_dstPixelP r4
- #define r_maskPixelP r5
- #define r_srcOffset r6
- #define r_dstOffset r7
- #define r_numBytesPerRow r8
- #define r_rowsToCopy r9
-
- #define r_srcPixel r10
- #define r_dstPixel r11
- #define r_index r12
- #define r_maskPixel r0
-
- ASM_BEGIN
-
- rlwinm r10,r_numBytesPerRow,0,31,31 // numBytesPerRow & 1
- rlwinm r11,r_numBytesPerRow,31,31,31 // numBytesPerRow & 2
- rlwinm r_numBytesPerRow,r_numBytesPerRow,30,2,31 // numBytesPerRow >> 2
-
- cmplwi cr7,r10,0
- cmplwi cr6,r11,0
- cmplwi cr5,r_numBytesPerRow,0
-
- @yloop:
- li r_index,0
-
- beq cr5,@skipwords
- mtctr r_numBytesPerRow
- @wordloop:
- lwzx r_maskPixel,r_maskPixelP,r_index
- lwzx r_dstPixel,r_dstPixelP,r_index
- lwzx r_srcPixel,r_srcPixelP,r_index
- and r_dstPixel,r_dstPixel,r_maskPixel
- or r_maskPixel,r_srcPixel,r_dstPixel
- stwx r_maskPixel,r_dstPixelP,r_index
- addi r_index,r_index,4
- bdnz @wordloop
- @skipwords:
-
- beq cr6,@skipshort
- lhzx r_maskPixel,r_maskPixelP,r_index
- lhzx r_dstPixel,r_dstPixelP,r_index
- lhzx r_srcPixel,r_srcPixelP,r_index
- and r_dstPixel,r_dstPixel,r_maskPixel
- or r_maskPixel,r_srcPixel,r_dstPixel
- sthx r_maskPixel,r_dstPixelP,r_index
- addi r_index,r_index,2
- @skipshort:
-
- beq cr7,@skipbyte
- lbzx r_maskPixel,r_maskPixelP,r_index
- lbzx r_dstPixel,r_dstPixelP,r_index
- lbzx r_srcPixel,r_srcPixelP,r_index
- and r_dstPixel,r_dstPixel,r_maskPixel
- or r_maskPixel,r_srcPixel,r_dstPixel
- stbx r_maskPixel,r_dstPixelP,r_index
- addi r_index,r_index,1
- @skipbyte:
-
- subic. r_rowsToCopy,r_rowsToCopy,1
-
- add r_srcPixelP,r_srcPixelP,r_srcOffset
- add r_dstPixelP,r_dstPixelP,r_dstOffset
- add r_maskPixelP,r_maskPixelP,r_srcOffset
-
- bne @yloop
-
- ASM_END
- }
-
- ///--------------------------------------------------------------------------------------
- // BlitPixiePartialMask - PowerPC asm implementation by Anders F Björklund
- ///--------------------------------------------------------------------------------------
-
- ASM_FUNC void BlitPixiePartialMask(
- register unsigned char *srcPixelP, // r3
- register unsigned char *dstPixelP, // r4
- register unsigned char *maskPixelP, // r5
- register unsigned long srcOffset, // r6
- register unsigned long dstOffset, // r7
- register unsigned short numBytesPerRow, // r8
- register unsigned short rowsToCopy) // r9
- {
- #define r_srcPixelP r3
- #define r_dstPixelP r4
- #define r_maskPixelP r5
- #define r_srcOffset r6
- #define r_dstOffset r7
- #define r_numBytesPerRow r8
- #define r_rowsToCopy r9
-
- #define r_srcPixel r10
- #define r_dstPixel r11
- #define r_index r12
- #define r_maskPixel r0
-
- ASM_BEGIN
-
- rlwinm r10,r_numBytesPerRow,0,31,31 // numBytesPerRow & 1
- rlwinm r11,r_numBytesPerRow,31,31,31 // numBytesPerRow & 2
- rlwinm r_numBytesPerRow,r_numBytesPerRow,30,2,31 // numBytesPerRow >> 2
-
- cmplwi cr7,r10,0
- cmplwi cr6,r11,0
- cmplwi cr5,r_numBytesPerRow,0
-
- @yloop:
- li r_index,0
-
- beq cr5,@skipwords
- mtctr numBytesPerRow
- @wordloop:
- lwzx r_maskPixel,r_maskPixelP,r_index
- lwzx r_srcPixel,r_srcPixelP,r_index
- lwzx r_dstPixel,r_dstPixelP,r_index
- andc r_srcPixel,r_srcPixel,r_maskPixel
- and r_dstPixel,r_dstPixel,r_maskPixel
- or r_maskPixel,r_srcPixel,r_dstPixel
- stwx r_maskPixel,r_dstPixelP,r_index
- addi r_index,r_index,4
- bdnz @wordloop
- @skipwords:
-
- beq cr6,@skipshort
- lhzx r_maskPixel,r_maskPixelP,r_index
- lhzx r_srcPixel,r_srcPixelP,r_index
- lhzx r_dstPixel,r_dstPixelP,r_index
- andc r_srcPixel,r_srcPixel,r_maskPixel
- and r_dstPixel,r_dstPixel,r_maskPixel
- or r_maskPixel,r_srcPixel,r_dstPixel
- sthx r_maskPixel,r_dstPixelP,r_index
- addi r_index,r_index,2
- @skipshort:
-
- beq cr7,@skipbyte
- lbzx r_maskPixel,maskPixelP,r_index
- lbzx r_srcPixel,r_srcPixelP,r_index
- lbzx r_dstPixel,r_dstPixelP,r_index
- andc r_srcPixel,r_srcPixel,r_maskPixel
- and r_dstPixel,r_dstPixel,r_maskPixel
- or r_maskPixel,r_srcPixel,r_dstPixel
- stbx r_maskPixel,r_dstPixelP,r_index
- addi r_index,r_index,1
- @skipbyte:
-
- subic. r_rowsToCopy,r_rowsToCopy,1
-
- add r_srcPixelP,r_srcPixelP,r_srcOffset
- add r_dstPixelP,r_dstPixelP,r_dstOffset
- add r_maskPixelP,r_maskPixelP,r_srcOffset
-
- bne @yloop
-
- ASM_END
- }
-
- #pragma mark *** 680X0 asm:
- #elif USE_68K_ASSEMBLY
-
- ///--------------------------------------------------------------------------------------
- // BlitPixieMask
- ///--------------------------------------------------------------------------------------
-
- ASM_FUNC void BlitPixieMask(
- register unsigned char *srcPixelP,
- register unsigned char *dstPixelP,
- register unsigned char *maskPixelP,
- register unsigned long srcRowStride,
- register unsigned long dstRowStride,
- register unsigned short numBytesPerRow,
- register unsigned short rowsToCopy)
- {
- register unsigned long loopsPerRow;
-
- ASM_BEGIN
-
- ext.l numBytesPerRow
- sub.l numBytesPerRow, srcRowStride
- sub.l numBytesPerRow, dstRowStride
-
- // longWordsPerRow = numBytesPerRow >> 2;
- move.l numBytesPerRow, d0
- lsr.l #2, d0
-
- // numBytesPerRow -= longWordsPerRow << 2;
- move.l d0, d1
- lsl.l #2, d1
- sub.l d1, numBytesPerRow
-
- // loopsPerRow = longWordsPerRow >> 4;
- move.l d0, loopsPerRow
- lsr.l #4, loopsPerRow
-
-
- moveq #0xF, d1
- and.l d1, d0
- lsl.l #3, d0 // longWordsPerRow *= 8;
- lea @loopEnd, a0 // get address of the end of the loop
- sub.l d0, a0 // calculate where to jmp in the loop
-
- @forEachRow:
- move.l loopsPerRow, d2
- jmp (a0)
- @loopBase:
- // 16
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 15
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 14
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 13
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 12
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 11
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 10
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 9
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 8
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 7
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 6
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 5
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 4
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 3
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 2
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- // 1
- move.l (dstPixelP), d0
- and.l (maskPixelP)+, d0
- or.l (srcPixelP)+, d0
- move.l d0, (dstPixelP)+
- @loopEnd:
- subq.l #1, d2
- bpl @loopBase
-
- // now do any leftover bits
- move.l numBytesPerRow, d2
- beq @nextRow
- subq.l #2, d2
- bmi @moveByte
- move.w (dstPixelP), d0
- and.w (maskPixelP)+, d0
- or.w (srcPixelP)+, d0
- move.w d0, (dstPixelP)+
- tst d2
- beq @nextRow
- @moveByte:
- move.b (dstPixelP), d0
- and.b (maskPixelP)+, d0
- or.b (srcPixelP)+, d0
- move.b d0, (dstPixelP)+
-
- @nextRow:
- adda.l srcRowStride, srcPixelP
- adda.l srcRowStride, maskPixelP
- adda.l dstRowStride, dstPixelP
-
- subq.w #1, rowsToCopy
- bne @forEachRow
-
- ASM_END
- }
-
-
-
-
- ///--------------------------------------------------------------------------------------
- // BlitPixiePartialMask
- ///--------------------------------------------------------------------------------------
-
- ASM_FUNC void BlitPixiePartialMask(
- register unsigned char *srcPixelP,
- register unsigned char *dstPixelP,
- register unsigned char *maskPixelP,
- register unsigned long srcRowStride,
- register unsigned long dstRowStride,
- register unsigned short numBytesPerRow,
- register unsigned short rowsToCopy)
- {
- register unsigned long loopsPerRow;
-
- ASM_BEGIN
-
- ext.l numBytesPerRow
- sub.l numBytesPerRow, srcRowStride
- sub.l numBytesPerRow, dstRowStride
-
- // longWordsPerRow = numBytesPerRow >> 2;
- moveq #0,d0
- move.w numBytesPerRow, d0
- lsr.l #2, d0
-
- // numBytesPerRow -= longWordsPerRow << 2;
- move.l d0, d1
- lsl.l #2, d1
- sub.l d1, numBytesPerRow
-
- // loopsPerRow = longWordsPerRow >> 4;
- move.l d0, loopsPerRow
- lsr.l #4, loopsPerRow
-
-
- moveq #0xF, d1
- and.l d1, d0
- mulu #14, d0 // longWordsPerRow *= 14 (bytes in segment of loop)
- lea @loopEnd, a0 // get address of the end of the loop
- sub.l d0, a0 // calculate where to jmp in the loop
-
- @forEachRow:
- move.l loopsPerRow, d2
- jmp (a0)
- @loopBase:
- // 16
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 15
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 14
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 13
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 12
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 11
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 10
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 9
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 8
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 7
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 6
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 5
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 4
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 3
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 2
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- // 1
- move.l (dstPixelP), d0
- and.l (maskPixelP), d0
- move.l (maskPixelP)+, d1
- not.l d1
- and.l (srcPixelP)+, d1
- or.l d1, d0
- move.l d0, (dstPixelP)+
- @loopEnd:
- subq.l #1, d2
- bpl @loopBase
-
- // now do any leftover bits
- move.l numBytesPerRow, d2
- beq @nextRow
- subq.l #2, d2
- bmi @moveByte
- move.w (dstPixelP), d0
- and.w (maskPixelP), d0
- move.w (maskPixelP)+, d1
- not.w d1
- and.w (srcPixelP)+, d1
- or.w d1, d0
- move.w d0, (dstPixelP)+
- tst d2
- beq @nextRow
- @moveByte:
- move.b (dstPixelP), d0
- and.b (maskPixelP), d0
- move.b (maskPixelP)+, d1
- not.b d1
- and.b (srcPixelP)+, d1
- or.b d1, d0
- move.b d0, (dstPixelP)+
-
- @nextRow:
- adda.l srcRowStride, srcPixelP
- adda.l srcRowStride, maskPixelP
- adda.l dstRowStride, dstPixelP
-
- subq.w #1, rowsToCopy
- bne @forEachRow
-
- ASM_END
- }
-
- #pragma mark *** Generic C:
- #elif USE_GENERIC_C
-
- ///--------------------------------------------------------------------------------------
- // BlitPixieMask
- ///--------------------------------------------------------------------------------------
-
- void BlitPixieMask(
- unsigned char *srcPixelP,
- unsigned char *dstPixelP,
- unsigned char *maskPixelP,
- unsigned long srcOffset,
- unsigned long dstOffset,
- unsigned short numBytesPerRow,
- unsigned short rowsToCopy)
- {
- unsigned long index;
-
- srcOffset -= numBytesPerRow;
- dstOffset -= numBytesPerRow;
-
- while (rowsToCopy--)
- {
- for (index = 0; index < numBytesPerRow; index++)
- {
- *dstPixelP++ = (*dstPixelP & *maskPixelP++) | *srcPixelP++;
- }
-
- // bump to next row
- srcPixelP += srcOffset;
- dstPixelP += dstOffset;
- maskPixelP += srcOffset;
- }
- }
-
- ///--------------------------------------------------------------------------------------
- // BlitPixiePartialMask
- ///--------------------------------------------------------------------------------------
-
- void BlitPixiePartialMask(
- unsigned char *srcPixelP,
- unsigned char *dstPixelP,
- unsigned char *maskPixelP,
- unsigned long srcOffset,
- unsigned long dstOffset,
- unsigned short numBytesPerRow,
- unsigned short rowsToCopy)
- {
- unsigned long index;
- unsigned char mask;
-
- srcOffset -= numBytesPerRow;
- dstOffset -= numBytesPerRow;
-
- while (rowsToCopy--)
- {
- for (index = 0; index < numBytesPerRow; index++)
- {
- mask = *maskPixelP++;
- *dstPixelP++ = (*dstPixelP & mask) | (*srcPixelP++ &~ mask);
- }
-
- // bump to next row
- srcPixelP += srcOffset;
- dstPixelP += dstOffset;
- maskPixelP += srcOffset;
- }
- }
-
- #endif
-
- #pragma mark -
-
- #ifndef GENERATINGASM // do not include for asm file generation
-
- ///--------------------------------------------------------------------------------------
- // BlitPixieMaskColor
- ///--------------------------------------------------------------------------------------
-
- void BlitPixieMaskColor(
- unsigned long srcColor,
- unsigned char *dstPixelP,
- unsigned char *maskPixelP,
- unsigned long srcOffset,
- unsigned long dstOffset,
- unsigned short numBytesPerRow,
- unsigned short rowsToCopy)
- {
- unsigned long index;
- unsigned long mask;
-
- srcOffset -= numBytesPerRow;
- dstOffset -= numBytesPerRow;
-
- while (rowsToCopy--)
- {
- // note: must do this with longs and shorts first, since depth can be both 32, 16 or 8 bits
-
- for ( index=0; index < (numBytesPerRow >> 2); index++)
- {
- mask = *((unsigned long *) maskPixelP);
- maskPixelP += sizeof(unsigned long);
- *((unsigned long *) dstPixelP) = (*((unsigned long *) dstPixelP) & mask) | (srcColor &~ mask);
- dstPixelP += sizeof(unsigned long);
- }
-
- if ( numBytesPerRow & 2 )
- {
- mask = *((unsigned short *) maskPixelP);
- maskPixelP += sizeof(unsigned short);
- *((unsigned short *) dstPixelP) = (*((unsigned short *) dstPixelP) & mask) | (srcColor &~ mask);
- dstPixelP += sizeof(unsigned short);
- }
-
- if ( numBytesPerRow & 1 )
- {
- mask = *maskPixelP++;
- *dstPixelP++ = (*dstPixelP & mask) | (srcColor &~ mask);
- }
-
- // bump to next row
- dstPixelP += dstOffset;
- maskPixelP += srcOffset;
- }
- }
-
- #endif
-